library("here")
## here() starts at /Users/ayu.3646/Homework1
chocolate <- readRDS(here("data", "chocolate.RDS"))
This is my solution for Project 1
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
glimpse(chocolate)
## Rows: 2,530
## Columns: 10
## $ ref <dbl> 2454, 2458, 2454, 2542, 2546, 2546, 2…
## $ company_manufacturer <chr> "5150", "5150", "5150", "5150", "5150…
## $ company_location <chr> "U.S.A.", "U.S.A.", "U.S.A.", "U.S.A.…
## $ review_date <dbl> 2019, 2019, 2019, 2021, 2021, 2021, 2…
## $ country_of_bean_origin <chr> "Tanzania", "Dominican Republic", "Ma…
## $ specific_bean_origin_or_bar_name <chr> "Kokoa Kamili, batch 1", "Zorzal, bat…
## $ cocoa_percent <chr> "76%", "76%", "76%", "68%", "72%", "8…
## $ ingredients <chr> "3- B,S,C", "3- B,S,C", "3- B,S,C", "…
## $ most_memorable_characteristics <chr> "rich cocoa, fatty, bready", "cocoa, …
## $ rating <dbl> 3.25, 3.50, 3.75, 3.00, 3.00, 3.25, 3…
library(dplyr)
library(ggplot2)
hist(chocolate$rating)
hist(chocolate$rating, breaks = seq(min(chocolate$rating), max(chocolate$rating), length.out = 11))
hist(chocolate$rating, breaks = seq(min(chocolate$rating), max(chocolate$rating), length.out = 16))
hist(chocolate$rating, breaks = seq(min(chocolate$rating), max(chocolate$rating), length.out = 21))
hist(chocolate$rating, breaks = seq(min(chocolate$rating), max(chocolate$rating), length.out = 26))
head(chocolate, 5)
## # A tibble: 5 × 10
## ref company_manufacturer company_location review_date country_of_bean_origin
## <dbl> <chr> <chr> <dbl> <chr>
## 1 2454 5150 U.S.A. 2019 Tanzania
## 2 2458 5150 U.S.A. 2019 Dominican Republic
## 3 2454 5150 U.S.A. 2019 Madagascar
## 4 2542 5150 U.S.A. 2021 Fiji
## 5 2546 5150 U.S.A. 2021 Venezuela
## # ℹ 5 more variables: specific_bean_origin_or_bar_name <chr>,
## # cocoa_percent <chr>, ingredients <chr>,
## # most_memorable_characteristics <chr>, rating <dbl>
#I evaluated the histograms with different numbers of bins and found that the histogram with 15 bins provides the best balance between detail and clarity. With too few bins (10), the distribution appears too generalized, obscuring finer details. Conversely, too many bins (25) create a fragmented view that adds unnecessary noise. The 15-bin histogram offers a clear visualization of the overall distribution, capturing key features of the data without overwhelming the viewer.
table(chocolate$country_of_bean_origin)
##
## Australia Belize Blend
## 3 76 156
## Bolivia Brazil Burma
## 80 78 1
## Cameroon China Colombia
## 3 1 79
## Congo Costa Rica Cuba
## 11 43 12
## Dominican Republic DR Congo Ecuador
## 226 1 219
## El Salvador Fiji Gabon
## 6 16 1
## Ghana Grenada Guatemala
## 41 19 62
## Haiti Honduras India
## 30 25 35
## Indonesia Ivory Coast Jamaica
## 20 7 24
## Liberia Madagascar Malaysia
## 3 177 8
## Martinique Mexico Nicaragua
## 1 55 100
## Nigeria Panama Papua New Guinea
## 3 9 50
## Peru Philippines Principe
## 244 24 1
## Puerto Rico Samoa Sao Tome
## 7 3 14
## Sao Tome & Principe Sierra Leone Solomon Islands
## 2 4 10
## Sri Lanka St. Lucia St.Vincent-Grenadines
## 2 10 1
## Sulawesi Sumatra Suriname
## 1 1 1
## Taiwan Tanzania Thailand
## 2 79 5
## Tobago Togo Trinidad
## 2 3 42
## U.S.A. Uganda Vanuatu
## 33 19 13
## Venezuela Vietnam
## 253 73
country_Ecuador <- filter(chocolate, country_of_bean_origin == "Ecuador")
sd(country_Ecuador$rating)
## [1] 0.5122678
length(country_Ecuador$rating)
## [1] 219
mean(country_Ecuador$rating)
## [1] 3.164384
mean_rating <- mean(country_Ecuador$rating)
sd_rating <- sd(country_Ecuador$rating)
total_reviews <- length(country_Ecuador$rating)
ecuador_summary <- data.frame(
mean = mean_rating,
sd = sd_rating,
total = total_reviews
)
print(ecuador_summary)
## mean sd total
## 1 3.164384 0.5122678 219
country_Ecuador <- filter(chocolate, country_of_bean_origin == "Ecuador")
location_avg_rating <- aggregate(rating ~ company_location, data = country_Ecuador, FUN = mean)
best_location <- location_avg_rating[which.max(location_avg_rating$rating), ]
print(best_location)
## company_location rating
## 2 Australia 3.8125
Australia makes the best chocolate (or has the highest ratings on average) with beans from Ecuador.
avg_rating_by_country <- aggregate(rating ~ country_of_bean_origin, data = chocolate, FUN = mean)
sorted_avg_rating <- avg_rating_by_country[order(-avg_rating_by_country$rating), ]
top_3_countries <- head(sorted_avg_rating, 3)
print(top_3_countries)
## country_of_bean_origin rating
## 55 Tobago 3.625
## 8 China 3.500
## 43 Sao Tome & Principe 3.500
Top 3 countries (for bean origin) having the highest ratings on average are Tobago, China, Sao Tome & Principle
review_counts <- chocolate %>%
count(country_of_bean_origin)
countries_with_min_reviews <- review_counts %>%
filter(n >= 10) %>%
pull(country_of_bean_origin)
filtered_chocolate <- chocolate %>%
filter(country_of_bean_origin %in% countries_with_min_reviews)
avg_rating_by_country <- filtered_chocolate %>%
group_by(country_of_bean_origin) %>%
summarise(mean_rating = mean(rating, na.rm = TRUE))
top_3_countries <- avg_rating_by_country %>%
arrange(desc(mean_rating)) %>%
slice_head(n = 3)
print(top_3_countries)
## # A tibble: 3 × 2
## country_of_bean_origin mean_rating
## <chr> <dbl>
## 1 Solomon Islands 3.45
## 2 Congo 3.32
## 3 Cuba 3.29
The top 3 countries with the highest average chocolate ratings are the Solomon Islands (3.45), Congo (3.32), and Cuba (3.29).
library(dplyr)
country_counts <- chocolate %>%
group_by(country_of_bean_origin) %>%
summarise(total_reviews = n())
countries_50_reviews <- country_counts %>%
filter(total_reviews >= 50)
filtered_chocolate <- chocolate %>%
filter(country_of_bean_origin %in% countries_50_reviews$country_of_bean_origin)
table(filtered_chocolate$country_of_bean_origin)
##
## Belize Blend Bolivia Brazil
## 76 156 80 78
## Colombia Dominican Republic Ecuador Guatemala
## 79 226 219 62
## Madagascar Mexico Nicaragua Papua New Guinea
## 177 55 100 50
## Peru Tanzania Venezuela Vietnam
## 244 79 253 73
# table(chocolate$country_of_bean_origin)
library(forcats)
filtered_chocolate <- filtered_chocolate %>%
mutate(percent_group = case_when(
cocoa_percent < 60 ~ "<60%",
cocoa_percent >= 60 & cocoa_percent < 70 ~ ">=60 to <70%",
cocoa_percent >= 70 & cocoa_percent < 90 ~ ">=70 to <90%",
cocoa_percent >= 90 ~ ">=90%"
))
filtered_chocolate <- filtered_chocolate %>%
mutate(percent_group = fct_relevel(percent_group, "<60%", ">=60 to <70%", ">=70 to <90%", ">=90%"))
table(filtered_chocolate$percent_group)
##
## <60% >=60 to <70% >=70 to <90% >=90%
## 41 270 1682 14
head(filtered_chocolate, 20)
## # A tibble: 20 × 11
## ref company_manufacturer company_location review_date
## <dbl> <chr> <chr> <dbl>
## 1 2454 5150 U.S.A. 2019
## 2 2458 5150 U.S.A. 2019
## 3 2454 5150 U.S.A. 2019
## 4 2546 5150 U.S.A. 2021
## 5 797 A. Morin France 2012
## 6 797 A. Morin France 2012
## 7 1015 A. Morin France 2013
## 8 1011 A. Morin France 2013
## 9 1011 A. Morin France 2013
## 10 1015 A. Morin France 2013
## 11 1019 A. Morin France 2013
## 12 1019 A. Morin France 2013
## 13 1011 A. Morin France 2013
## 14 1015 A. Morin France 2013
## 15 1019 A. Morin France 2013
## 16 1315 A. Morin France 2014
## 17 1315 A. Morin France 2014
## 18 1319 A. Morin France 2014
## 19 1319 A. Morin France 2014
## 20 1704 A. Morin France 2015
## # ℹ 7 more variables: country_of_bean_origin <chr>,
## # specific_bean_origin_or_bar_name <chr>, cocoa_percent <chr>,
## # ingredients <chr>, most_memorable_characteristics <chr>, rating <dbl>,
## # percent_group <fct>
filtered_chocolate <- filtered_chocolate %>%
mutate(percent_group = as.factor(percent_group))
filtered_chocolate <- filtered_chocolate %>%
mutate(percent_group = fct_relevel(percent_group, "<60%", ">=60 to <70%", ">=70 to <90%", ">=90%"))
levels(filtered_chocolate$percent_group)
## [1] "<60%" ">=60 to <70%" ">=70 to <90%" ">=90%"
library(ggplot2)
ggplot(filtered_chocolate, aes(x = percent_group, y = rating, fill = percent_group)) +
geom_boxplot() +
facet_wrap(~ country_of_bean_origin) +
labs(title = "Boxplots of Ratings by Chocolate Percentage Groups",
x = "Chocolate Percentage Group",
y = "Rating") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
average_ratings <- filtered_chocolate %>%
group_by(percent_group) %>%
summarise(avg_rating = mean(rating, na.rm = TRUE))
print(average_ratings)
## # A tibble: 4 × 2
## percent_group avg_rating
## <fct> <dbl>
## 1 <60% 2.66
## 2 >=60 to <70% 3.25
## 3 >=70 to <90% 3.22
## 4 >=90% 2.75
##>=60 but <70 has the highest average rating of 3.246 ##There is an overall agreement between countries.
##Solution
options(repos = c(CRAN = "https://cloud.r-project.org"))
if(!require(gapminder)) {
install.packages("gapminder")
}
## Loading required package: gapminder
library(gapminder)
install.packages("gapminder")
##
## The downloaded binary packages are in
## /var/folders/p1/wpj3v66n52gd3xt34qd6ssm80000gn/T//Rtmp29CgFA/downloaded_packages
library(gapminder)
gapminder_continents <- gapminder %>%
select(country, continent) %>%
distinct()
chocolate_with_continent <- filtered_chocolate %>%
left_join(gapminder_continents, by = c("country_of_bean_origin" = "country"))
head(chocolate_with_continent)
## # A tibble: 6 × 12
## ref company_manufacturer company_location review_date country_of_bean_origin
## <dbl> <chr> <chr> <dbl> <chr>
## 1 2454 5150 U.S.A. 2019 Tanzania
## 2 2458 5150 U.S.A. 2019 Dominican Republic
## 3 2454 5150 U.S.A. 2019 Madagascar
## 4 2546 5150 U.S.A. 2021 Venezuela
## 5 797 A. Morin France 2012 Bolivia
## 6 797 A. Morin France 2012 Peru
## # ℹ 7 more variables: specific_bean_origin_or_bar_name <chr>,
## # cocoa_percent <chr>, ingredients <chr>,
## # most_memorable_characteristics <chr>, rating <dbl>, percent_group <fct>,
## # continent <fct>
library(dplyr)
country_counts <- chocolate_with_continent %>%
group_by(country_of_bean_origin) %>%
summarise(total_reviews = n())
chocolate_filtered <- chocolate_with_continent %>%
filter(country_of_bean_origin %in% country_counts$country_of_bean_origin[country_counts$total_reviews >= 10] &
country_of_bean_origin != "Blend")
table(chocolate_with_continent$country_of_bean_origin)
##
## Belize Blend Bolivia Brazil
## 76 156 80 78
## Colombia Dominican Republic Ecuador Guatemala
## 79 226 219 62
## Madagascar Mexico Nicaragua Papua New Guinea
## 177 55 100 50
## Peru Tanzania Venezuela Vietnam
## 244 79 253 73
sum(is.na(chocolate_filtered$continent))
## [1] 126
table(chocolate_filtered$country_of_bean_origin)
##
## Belize Bolivia Brazil Colombia
## 76 80 78 79
## Dominican Republic Ecuador Guatemala Madagascar
## 226 219 62 177
## Mexico Nicaragua Papua New Guinea Peru
## 55 100 50 244
## Tanzania Venezuela Vietnam
## 79 253 73
library(ggplot2)
# Violin plot of ratings by continent
ggplot(chocolate_filtered, aes(x = continent, y = rating, fill = continent)) +
geom_violin() +
labs(title = "Violin Plot of Ratings by Continent",
x = "Continent",
y = "Rating") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# # install.packages(c("dplyr", "stringr", "tidyr"))
#
library(dplyr)
library(stringr)
if (!require("dplyr", quietly = TRUE)) {
print("dplyr is not installed")
} else {
print("dplyr is installed")
}
## [1] "dplyr is installed"
chocolate <- chocolate %>%
mutate(
beans = ifelse(str_detect(ingredients, "B"), 1, 0),
sugar = ifelse(str_detect(ingredients, "S"), 1, 0),
cocoa_butter = ifelse(str_detect(ingredients, "C"), 1, 0),
vanilla = ifelse(str_detect(ingredients, "V"), 1, 0),
lecithin = ifelse(str_detect(ingredients, "L"), 1, 0),
salt = ifelse(str_detect(ingredients, "Sa"), 1, 0)
)
library(dplyr)
library(stringr)
chocolate <- chocolate %>%
mutate(
char_cocoa = ifelse(str_detect(most_memorable_characteristics, "cocoa"), 1, 0),
char_sweet = ifelse(str_detect(most_memorable_characteristics, "sweet"), 1, 0),
char_nutty = ifelse(str_detect(most_memorable_characteristics, "nutty"), 1, 0),
char_creamy = ifelse(str_detect(most_memorable_characteristics, "creamy"), 1, 0),
char_roasty = ifelse(str_detect(most_memorable_characteristics, "roasty"), 1, 0),
char_earthy = ifelse(str_detect(most_memorable_characteristics, "earthy"), 1, 0),
beans = ifelse(str_detect(ingredients, "B"), 1, 0),
sugar = ifelse(str_detect(ingredients, "S"), 1, 0),
cocoa_butter = ifelse(str_detect(ingredients, "C"), 1, 0),
vanilla = ifelse(str_detect(ingredients, "V"), 1, 0),
lecithin = ifelse(str_detect(ingredients, "L"), 1, 0),
salt = ifelse(str_detect(ingredients, "Sa"), 1, 0)
)
chocolate_summary <- chocolate %>%
group_by(review_date) %>%
summarize(
mean_char_cocoa = mean(char_cocoa, na.rm = TRUE),
mean_char_sweet = mean(char_sweet, na.rm = TRUE),
mean_char_nutty = mean(char_nutty, na.rm = TRUE),
mean_char_creamy = mean(char_creamy, na.rm = TRUE),
mean_char_roasty = mean(char_roasty, na.rm = TRUE),
mean_char_earthy = mean(char_earthy, na.rm = TRUE),
mean_beans = mean(beans, na.rm = TRUE),
mean_sugar = mean(sugar, na.rm = TRUE),
mean_cocoa_butter = mean(cocoa_butter, na.rm = TRUE),
mean_vanilla = mean(vanilla, na.rm = TRUE),
mean_lecithin = mean(lecithin, na.rm = TRUE),
mean_salt = mean(salt, na.rm = TRUE)
)
print(chocolate_summary)
## # A tibble: 16 × 13
## review_date mean_char_cocoa mean_char_sweet mean_char_nutty mean_char_creamy
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2006 0.210 0.161 0.0323 0.242
## 2 2007 0.342 0.0959 0.0411 0.233
## 3 2008 0.109 0.130 0.152 0.0978
## 4 2009 0.146 0.154 0.154 0.0894
## 5 2010 0.218 0.1 0.145 0.0909
## 6 2011 0.172 0.110 0.117 0.129
## 7 2012 0.0876 0.139 0.103 0.0722
## 8 2013 0.175 0.126 0.115 0.0710
## 9 2014 0.0607 0.0972 0.158 0.0486
## 10 2015 0.127 0.106 0.109 0.0423
## 11 2016 0.0922 0.171 0.157 0.0553
## 12 2017 0.133 0.0952 0.0667 0.0952
## 13 2018 0.180 0.118 0.0789 0.0439
## 14 2019 0.259 0.145 0.0725 0.0881
## 15 2020 0.284 0.160 0.0494 0.0370
## 16 2021 0.297 0.126 0.0971 0.0171
## # ℹ 8 more variables: mean_char_roasty <dbl>, mean_char_earthy <dbl>,
## # mean_beans <dbl>, mean_sugar <dbl>, mean_cocoa_butter <dbl>,
## # mean_vanilla <dbl>, mean_lecithin <dbl>, mean_salt <dbl>
chocolate_long <- chocolate_summary %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "feature",
values_to = "mean_score"
)
print(chocolate_long)
## # A tibble: 192 × 3
## review_date feature mean_score
## <dbl> <chr> <dbl>
## 1 2006 mean_char_cocoa 0.210
## 2 2006 mean_char_sweet 0.161
## 3 2006 mean_char_nutty 0.0323
## 4 2006 mean_char_creamy 0.242
## 5 2006 mean_char_roasty 0.0484
## 6 2006 mean_char_earthy 0.0645
## 7 2006 mean_beans 1
## 8 2006 mean_sugar 1
## 9 2006 mean_cocoa_butter 0.933
## 10 2006 mean_vanilla 0.717
## # ℹ 182 more rows
library(ggplot2)
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point(size = 3) +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
-1. Large, Messy Points: Using a large point size (e.g., size = 3) can clutter the plot, demonstrating the importance of adjusting point size to balance emphasis and clarity in visualizations. - It underscores the need to choose an optimal point size that balances emphasis and clarity in visualizations.
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
geom_line(linetype = "dotted") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "italic", size = 8),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, color = "chartreuse"),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 5, hjust = 0.5, color = "darkorange", face = "italic"),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "~~Mean Chocolate Feature Scores~~ *Over* **Time** 🌟🎉",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Score (mean for every single chocolate ever reviewed)"
,
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12)
)
## `geom_smooth()` using formula = 'y ~ x'
###Part 6: Make my plot a better plot!
ggplot(chocolate_long, aes(x = review_date, y = mean_score, fill = feature)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Mean Chocolate Feature Scores by Year",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
legend.text = element_text(size = 10),
panel.background = element_rect(),
panel.spacing = unit(1, "lines")
)
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 12),
panel.background = element_rect(fill = "white"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point(size = 2, alpha = 0.7) +
geom_smooth(se = FALSE, method = "loess", color = "black") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(size = 10, face = "bold"),
strip.background = element_rect(fill = "lightgrey"),
panel.spacing = unit(2, "lines"),
panel.background = element_rect(fill = "white"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point(size = 2, alpha = 0.7) +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(size = 10, face = "bold"),
panel.background = element_rect(fill = "white"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point(size = 2, alpha = 0.7) +
geom_smooth(se = FALSE, method = "loess", color = "black") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(size = 10, face = "bold"),
panel.background = element_rect(fill = "white"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(face = "bold", size = 10),
panel.spacing = unit(2, "lines")
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(chocolate_long, aes(x = review_date, y = mean_score, color = feature)) +
geom_point(shape = 17) +
geom_smooth(se = FALSE, method = "loess", color = "black") +
facet_wrap(~ feature, scales = "free_y") +
labs(
title = "Mean Chocolate Feature Scores Over Time",
subtitle = "Visualizing the trends in characteristics and ingredients across review years",
x = "Year of Review",
y = "Mean Score",
caption = "Data visualization by: AYUSHI GUPTA"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
plot.caption = element_text(size = 10, hjust = 1),
strip.text = element_text(size = 10, face = "bold"),
panel.background = element_rect(fill = "white"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
## `geom_smooth()` using formula = 'y ~ x'